
library(tidyverse)

# set replication folder as working directory
setwd("~replication")

load("data_genderedcost_long.rdata")
load("data_genderedcost_background.rdata")

df_long <- df_long %>% 
  filter(SurveyStatus==2) %>% 
  filter(SurveyEndTime<="2021-12-20 21:49:52")

df_background <- df_background %>% 
  filter(SurveyEndTime<="2021-12-20 21:49:52")

### FIGURE G1

## plot for victimhood intensity distribution in sample and men/women 
sample_shares <- df_background %>%
  count(victimhood_intensity) %>% 
  mutate(prop_victim = prop.table(n)) %>% 
  mutate(sex = factor("Full Sample"))

subset_shares <- df_background %>% 
  count(sex, victimhood_intensity) %>% 
  group_by(sex) %>% 
  mutate(prop_victim = prop.table(n)) %>% 
  mutate(sex = factor(ifelse(sex=="Man","Men","Women")))

# add zero column for men with 5 types of experiences (so bars have same width in the plot)
extra_row <- tibble(victimhood_intensity = 5, n = 0, prop_victim = 0, sex = "Men")

shares <- bind_rows(sample_shares, subset_shares, extra_row) %>% 
  mutate(victimhood_intensity = factor(victimhood_intensity))

shares %>% 
  filter(victimhood_intensity!="0") %>% 
  ggplot(data=., aes(x=victimhood_intensity, y=prop_victim, fill = sex)) +
  geom_col(color = "black", position = position_dodge2(width = 0.2)) +
  theme_bw() +
  ylab("Share of victims") +
  xlab("# types of harassment experienced") +
  scale_fill_grey("", drop = FALSE) +
  coord_cartesian(ylim=c(0,0.2)) +
  scale_y_continuous(labels = seq(0,1,0.02), breaks = seq(0,1,0.02)) +
  geom_text(aes(label = round(prop_victim,digits = 2)),
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  theme(legend.position = "right",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank())

ggsave("figureG1.pdf", height = 4, width = 8)

### FIGURE G2
## create a cumulative plot of victimhood intensity (essentially showing the distribution of victims given more strict definitions)
## cumulate props and then create plots (without a column for non-victims who are the benchmark to the numbers of all other columns)

shares %>% 
  arrange(desc(victimhood_intensity)) %>% 
  group_by(sex) %>%
  mutate(cumulative_prop_victim = cumsum(prop_victim)) %>% 
  filter(victimhood_intensity!=0) %>% 
  ggplot(data=., aes(x=victimhood_intensity, y=cumulative_prop_victim, fill = sex)) +
  geom_col(color = "black", position = position_dodge2(width = 0.2)) +
  theme_bw() +
  ylab("Share of victims") +
  xlab("Minimum # of types of harassment experienced") +
  scale_fill_grey("") +
  scale_color_grey("") +
  scale_y_continuous(labels = seq(0,1,0.05), breaks = seq(0,1,0.05)) +
  geom_text(aes(label = round(prop_victim,digits = 2)),
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  theme(legend.position = "right",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank())

ggsave("figureG2.pdf", height = 4, width = 8)



### FIGURE G3

# 1 type - traditional victims
df_victims <- df_long %>% 
  dplyr::filter(victim=="Victim")

mm_victims <- cregg::mm(df_victims, # data
                        choice~position + remuneration + workload + work_environment, # formula
                        id = id)

mm_victims$victim <- "Victim" # add victim variable
mm_victims$intensity <- "1"

df_nonvictims <- df_long %>% 
  dplyr::filter(victim=="Non-victim")

mm_nonvictims <- cregg::mm(df_nonvictims, # data
                           choice~position + remuneration + workload + work_environment, # formula
                           id = id)

mm_nonvictims$victim <- "Non-victim" # add victim variable
mm_nonvictims$intensity <- "1"

# 2 TYPES
df_victims_2 <- df_long %>% 
  dplyr::filter(victim_2=="Victim")

mm_victims_2 <- cregg::mm(df_victims_2, # data
                          choice~position + remuneration + workload + work_environment, # formula
                          id = id)

mm_victims_2$victim <- "Victim" # add victim variable
mm_victims_2$intensity <- "2"

df_nonvictims_2 <- df_long %>% 
  dplyr::filter(victim_2=="Non-victim")

mm_nonvictims_2 <- cregg::mm(df_nonvictims_2, # data
                             choice~position + remuneration + workload + work_environment, # formula
                             id = id)

mm_nonvictims_2$victim <- "Non-victim" # add victim variable
mm_nonvictims_2$intensity <- "2"

# 3 TYPES
df_victims_3 <- df_long %>% 
  dplyr::filter(victim_3=="Victim")

mm_victims_3 <- cregg::mm(df_victims_3, # data
                          choice~position + remuneration + workload + work_environment, # formula
                          id = id)

mm_victims_3$victim <- "Victim" # add victim variable
mm_victims_3$intensity <- "3"

df_nonvictims_3 <- df_long %>% 
  dplyr::filter(victim_3=="Non-victim")

mm_nonvictims_3 <- cregg::mm(df_nonvictims_3, # data
                             choice~position + remuneration + workload + work_environment, # formula
                             id = id)

mm_nonvictims_3$victim <- "Non-victim" # add victim variable
mm_nonvictims_3$intensity <- "3"

#combine estimates from the 6 subsets, add attribute variable for facetting and rearrange order of levels
mm_victim_subsets <- bind_rows(mm_victims, mm_nonvictims,
                               mm_victims_2, mm_nonvictims_2,
                               mm_victims_3, mm_nonvictims_3) %>% 
  mutate(feature = case_when(feature=="work_environment"~"Working Environment",
                             feature=="workload"~"Workload",
                             feature=="remuneration"~"Remuneration",
                             feature=="position"~"Position")) %>% 
  mutate(feature = factor(feature, levels = c("Position", "Remuneration", "Workload", "Working Environment")))

## plot
mm_victim_subsets %>% 
  ggplot(data=., aes(y = level, x = estimate, color = intensity, shape = victim)) +
  geom_point(position = position_dodge2(width = 1)) +
  geom_linerange(aes(xmin=estimate-std.error*1.45,
                     xmax=estimate+std.error*1.45),
                 position = position_dodge2(width = 1)) +
  xlab("Marginal mean") +
  ylab("") +
  scale_x_continuous(labels = seq(0.3,0.8,0.05), breaks = seq(0.3,0.8,0.05)) +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  theme_bw() +
  scale_color_grey("", start = 0.1, end = 0.65) +
  scale_shape_discrete("") +
  facet_wrap(~feature, scales = "free_y", ncol = 1) +
  theme(legend.position = "top", panel.background = element_rect(fill = "white"),
        strip.background = element_rect("white"),
        strip.text = element_text(hjust = 0, face = "bold"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggsave("figureG3.pdf", height = 8, width = 8)
